For this analysis we are using the Climate Change: Earth Surface Temperature Data from kaggle. This data set can be found here: https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data#GlobalLandTemperaturesByCountry.csv
The goal is to show how the fxtract package can support climate and economical analysis.
library(fxtract)
library(lubridate)
library(tidyverse)
library(stringr)
df_glob_temp = read.csv("GlobalLandTemperaturesByCountry.csv")
str(df_glob_temp)
## 'data.frame': 577462 obs. of 4 variables:
## $ dt : Factor w/ 3239 levels "1743-11-01","1743-12-01",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ AverageTemperature : num 4.38 NA NA NA NA ...
## $ AverageTemperatureUncertainty: num 2.29 NA NA NA NA ...
## $ Country : Factor w/ 243 levels "Ã…land","Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
For our analysis we consider only years since 1900. The main reason for this is that in recent years we have had many missing values for some countries.
df_glob_temp$dt = as.Date(df_glob_temp$dt)
df1900_raw = df_glob_temp %>% filter(dt >= "1900-01-01")
Removal of duplicates and some smaller regions.
df1900 = df1900_raw %>% filter(!Country %in% c('Denmark', 'Antarctica', 'France', 'Europe', 'Netherlands',
'United Kingdom', 'South America', 'Ã…land', 'Africa', 'Asia', 'Baker Island', 'Curaçao', 'Kingman Reef', 'North America', 'Oceania',
'Palmyra Atoll', 'Saint Barthélemy', 'Saint Martin', 'Virgin Islands'))
df1900$Country = gsub("\\(Europe)", "", df1900$Country)
For an annual view we would like to have the year in a separate column.
df1900$year = year(df1900$dt)
temp_stats = function(data) {
allYears = unique(data$year)
data = data %>% group_by(year) %>%
summarise(
mean = mean(AverageTemperature, na.rm = T),
sd = sd(AverageTemperature, na.rm = T),
min = min(AverageTemperature, na.rm = T),
max = max(AverageTemperature, na.rm = T)
)
data = data %>% as.data.frame()
res = c(mean = data$mean,
sd = data$sd,
min = data$min,
max = data$max)
allYears = unique(df1900$year)
newnames = c(paste0("mean_", allYears),
paste0("sd_", allYears),
paste0("min_", allYears),
paste0("max_", allYears))
names(res) = newnames
res
}
xtractor = Xtractor$new("xtractor")
xtractor$n_cores = 2
xtractor$add_data(df1900, group_by = "Country")
xtractor$add_feature(temp_stats)
xtractor$calc_features()
library(knitr)
library(kableExtra)
res = xtractor$results %>% gather(key = "key", value = "value", -Country) %>%
separate(key, c("key", "year")) %>% select(Country, year, key, value) %>%
as.data.frame()
res$value[is.infinite(res$value)] <- NA
res %>% slice(1:20) %>% kable(col.names = c("Country", "Year", "Key", "Value")) %>%
kable_styling() %>%
scroll_box(width = "100%", height = "400px")
| Country | Year | Key | Value |
|---|---|---|---|
| Afghanistan | 1900 | mean | 13.749333 |
| Albania | 1900 | mean | 13.068583 |
| Algeria | 1900 | mean | 22.864167 |
| American Samoa | 1900 | mean | 26.273500 |
| Andorra | 1900 | mean | 11.348333 |
| Angola | 1900 | mean | 21.789917 |
| Anguilla | 1900 | mean | 26.406750 |
| Antigua And Barbuda | 1900 | mean | 26.229917 |
| Argentina | 1900 | mean | 14.807250 |
| Armenia | 1900 | mean | 8.243750 |
| Aruba | 1900 | mean | 28.027917 |
| Australia | 1900 | mean | 21.766583 |
| Austria | 1900 | mean | 6.670250 |
| Azerbaijan | 1900 | mean | 10.582500 |
| Bahamas | 1900 | mean | 25.057417 |
| Bahrain | 1900 | mean | 25.899250 |
| Bangladesh | 1900 | mean | 25.219083 |
| Barbados | 1900 | mean | 26.279167 |
| Belarus | 1900 | mean | 5.820000 |
| Belgium | 1900 | mean | 9.787333 |
limo_slope = function(data){
lin_model = lm(temp ~ year, data = data)
slope = lin_model$coefficients[[2]]
c("slope" = slope)
}
df = res %>% filter(key == "mean") %>% select(temp = value, everything())
df$year = as.numeric(df$year)
xtractor2 = Xtractor$new("xtractor2")
xtractor2$n_cores = 2
xtractor2$add_data(df, group_by = "Country")
xtractor2$add_feature(limo_slope)
xtractor2$calc_features()
res2 = xtractor2$results
| Country | Slope |
|---|---|
| Afghanistan | 0.0145613 |
| Albania | 0.0080569 |
| Algeria | 0.0125315 |
| American Samoa | 0.0095860 |
| Andorra | 0.0117471 |
| Angola | 0.0089852 |
| Anguilla | 0.0107910 |
| Antigua And Barbuda | 0.0109234 |
| Argentina | 0.0083684 |
| Armenia | 0.0143635 |
| Aruba | 0.0098219 |
| Australia | 0.0087285 |
| Austria | 0.0120316 |
| Azerbaijan | 0.0156783 |
| Bahamas | 0.0091516 |
| Bahrain | 0.0137320 |
| Bangladesh | 0.0071229 |
| Barbados | 0.0111324 |
| Belarus | 0.0138405 |
| Belgium | 0.0105878 |
For a better overview, we can then plot our results in a map.